{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# reg——alpha调优"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
      "  DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "([mean: -0.58816, std: 0.00284, params: {'reg_alpha': 0.1},\n",
       "  mean: -0.58793, std: 0.00267, params: {'reg_alpha': 1},\n",
       "  mean: -0.59198, std: 0.00232, params: {'reg_alpha': 10}],\n",
       " {'reg_alpha': 1},\n",
       " -0.587928576874992)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from xgboost import XGBClassifier\n",
    "import xgboost as xgb\n",
    "\n",
    "import pandas as pd \n",
    "import numpy as np\n",
    "\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.model_selection import StratifiedKFold\n",
    "\n",
    "from sklearn.metrics import log_loss\n",
    "\n",
    "from matplotlib import pyplot\n",
    "import seaborn as sns\n",
    "%matplotlib inline\n",
    "\n",
    "path = 'Desktop/RentListingInquries/code/data/'\n",
    "test = pd.read_csv(path+'RentListingInquries_FE_test.csv')\n",
    "train = pd.read_csv(path+'RentListingInquries_FE_train.csv')\n",
    "y_train = train['interest_level']\n",
    "x_train = train.drop('interest_level',axis=1)\n",
    "\n",
    "reg_alpha = [ 0.1, 1, 10]    \n",
    "param_test4 = dict(reg_alpha=reg_alpha)\n",
    "\n",
    "xgb3 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=261,  #第二轮参数调整得到的n_estimators最优值\n",
    "        max_depth=5,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample=0.5,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "gsearch3 = GridSearchCV(xgb3, param_grid = param_test4, scoring='neg_log_loss',n_jobs=-1, cv=3)\n",
    "gsearch3.fit(x_train , y_train)\n",
    "\n",
    "gsearch3.grid_scores_, gsearch3.best_params_,     gsearch3.best_score_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "性能比之前又前进了一步，可以尝试着再将参数范围缩小一些"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
      "  DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "([mean: -0.58752, std: 0.00228, params: {'reg_alpha': 0.5},\n",
       "  mean: -0.58793, std: 0.00267, params: {'reg_alpha': 1},\n",
       "  mean: -0.58722, std: 0.00254, params: {'reg_alpha': 2},\n",
       "  mean: -0.58768, std: 0.00208, params: {'reg_alpha': 3},\n",
       "  mean: -0.58894, std: 0.00203, params: {'reg_alpha': 4}],\n",
       " {'reg_alpha': 2},\n",
       " -0.5872238029611496)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reg_alpha = [ 0.5 , 1 , 2 , 3 , 4  ]    \n",
    "param_test3 = dict(reg_alpha=reg_alpha)\n",
    "\n",
    "xgb3 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=261,  #第二轮参数调整得到的n_estimators最优值\n",
    "        max_depth=5,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample=0.5,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "gsearch3 = GridSearchCV(xgb3, param_grid = param_test3, scoring='neg_log_loss',n_jobs=-1, cv=3)\n",
    "gsearch3.fit(x_train , y_train)\n",
    "\n",
    "gsearch3.grid_scores_, gsearch3.best_params_,     gsearch3.best_score_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "这次性能又有了不小的提升，再次缩小范围"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
      "  DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "([mean: -0.58752, std: 0.00228, params: {'reg_alpha': 0.5},\n",
       "  mean: -0.58770, std: 0.00249, params: {'reg_alpha': 0.8},\n",
       "  mean: -0.58793, std: 0.00267, params: {'reg_alpha': 1},\n",
       "  mean: -0.58754, std: 0.00253, params: {'reg_alpha': 1.5},\n",
       "  mean: -0.58742, std: 0.00199, params: {'reg_alpha': 1.8},\n",
       "  mean: -0.58722, std: 0.00254, params: {'reg_alpha': 2},\n",
       "  mean: -0.58764, std: 0.00272, params: {'reg_alpha': 2.5},\n",
       "  mean: -0.58768, std: 0.00208, params: {'reg_alpha': 3}],\n",
       " {'reg_alpha': 2},\n",
       " -0.5872238029611496)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reg_alpha = [ 0.5 , 0.8 , 1 , 1.5 , 1.8 , 2 , 2.5,  3   ]    \n",
    "param_test3 = dict(reg_alpha=reg_alpha)\n",
    "\n",
    "xgb3 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=261,  #第二轮参数调整得到的n_estimators最优值\n",
    "        max_depth=5,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample=0.5,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "gsearch3 = GridSearchCV(xgb3, param_grid = param_test3, scoring='neg_log_loss',n_jobs=-1, cv=3)\n",
    "gsearch3.fit(x_train , y_train)\n",
    "\n",
    "gsearch3.grid_scores_, gsearch3.best_params_,     gsearch3.best_score_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "经过筛选，得出最佳的reg_alpha值为2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "module 'xgboost' has no attribute 'pridct_proba'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-14-1bf2ca670c48>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0my_pred\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mxgb\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpridct_proba\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtest\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;31mAttributeError\u001b[0m: module 'xgboost' has no attribute 'pridct_proba'"
     ]
    }
   ],
   "source": [
    "y_pred = xgb.pridct_proba(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
